# coding=utf-8
import re
import urllib
import urllib2
import cookielib

page = 1
url = 'http://www.qiushibaike.com/hot/page/' + str(page)
user_agent = 'Mozilla/4.0(compatible; MISE5.5; Windows NT)'
headers = {'User-Agent': user_agent}
try:
    request = urllib2.Request(url, headers=headers)
    response = urllib2.urlopen(request)
    content = response.read().decode('utf-8')
    # content = '''<span> xxxx</span> fjdlkfjdjgkjgkdafls <h2>aaaaa</h2>dfjlskfjds<span> yyyyy></span> jfldsfjlsfj<h2>
    # bbbbb</h2>fsdlkfjdslkf<h2>cccccc</h2>'''
    # print(content)
    print('temp')
    pattern = re.compile('<a href.*?' +
                         '<h2>\s*(.*?)\s*</h2>.*?' +
                         '<div class="content">\s<span>\s*(.*?)\s*</span>.*?' +
                         '<i class="number">(.*?)</i>', re.S)
    items = re.findall(pattern, content)
    print(items)
    for item in items:
        print('q = : ')
        for i in item:
            print(i)
except urllib2.URLError, e:
    if hasattr(e, "code"):
        print e.code
    if hasattr(e, "reason"):
        print e.reason
